/* d_clean_test_data.do -*******************************************************

	This file links student SASIDS (the Massachusetts state ID for students) to
	test data and reshapes the data to be unique on SASID.

******************************************************************************/
set more off
local mcas_years "03 04 05 06 07 08 10"
local mcas_tests "e m"
global mcas_reshape = 1
global sat_reshape = 1
global merge_test = 1

/**************************** MCAS Reshaping **********************************/
if $mcas_reshape == 1{
	*First reshape the MCAS data
	use "$stata_data_mcas/mcas_full.dta", clear
	destring sasid, gen(sasid_no) force
	drop if mi(sasid_no)
	foreach mcas_test of local mcas_tests {
		*Now make one observation per SASID grade
		bysort sasid_no grade: egen took`mcas_test' = max(`mcas_test'teststat == "T")

		*There are a few students for whom they have multiple test scores in a grade, but at separate schools.
		sort sasid_no grade `mcas_test'teststat year `mcas_test'stdrawsc `mcas_test'stdscaleds, stable

		by sasid_no grade `mcas_test'teststat: gen std_`mcas_test' = `mcas_test'stdrawsc[1] if `mcas_test'teststat == "T" & !missing(`mcas_test'stdrawsc)

		by sasid_no grade `mcas_test'teststat: gen std_scaled_`mcas_test' = `mcas_test'stdscaleds[1] if `mcas_test'teststat == "T" & !missing(`mcas_test'stdscaleds)
	}

	gen took_mcas = tooke == 1 | tookm == 1
	gen mcas_year = year
	*Generate school results.
	preserve
		gen len_school = length(school)
		replace school = substr(school, -4, len_school)

		collapse (max) std_e std_m std_scaled_m std_scaled_e, by(school sasid grade)

		collapse sch_std_e = std_e sch_std_m = std_m sch_std_scaled_m = std_scaled_m sch_std_scaled_e = std_scaled_e, by(school grade)

		keep if inlist(grade, "04", "05", "06", "07", "08", "10")
		destring grade, replace

		save "$stata_data_mcas/school_mcas.dta", replace
	restore

	*Collapse to keep the observations
	collapse (max) took* std_* (first) mcas_year sasid, by(grade sasid_no)

	destring grade, replace

	*This section creates an end point for tsfill to fill to
	preserve
	keep sasid_no grade
	collapse grade, by(sasid_no)
	replace grade = 12
	tempfile add_more
	save `add_more'
	restore
	append using `add_more'

	*we fill up observations so we can reshape our data
	tsset sasid_no grade
	tsfill
	drop sasid

	*Drop the temporary grades created to help fill
	drop if grade > 10

	*Find the earliest grade a test is observed
	bysort sasid_no took_mcas: egen mid_earliest_mcas = min(grade) if took_mcas == 1
	bysort sasid_no: egen earliest_mcas = min(mid_earliest_mcas)
	drop mid_earliest_mcas

	*Save a long version
	save "$stata_data_mcas/mcas_sasid_long.dta", replace

	*Reshape to make unique on sasid_no
	reshape wide took* std_* mcas_year, i(sasid_no) j(grade)

	*Figure out the earliest mcas and whether or not they took the mcas
	egen took_mcas = rowmax(took*)
	save "$stata_data_mcas/mcas_sasid_wide.dta", replace

}
/**************************** SAT Reshape *************************************/
if $sat_reshape == 1{
	use "$stata_data_sat/sat_full.dta", clear
	destring sasid, gen(sasid_no) force

	gen sat_reason = satvrecn + satmrecn
	levelsof yearsat, local(years_sat)

	local sat_tests sat_reason satvrecn satmrecn satw sattot c_sattot c_satw c_satvrecn c_satmrecn
	foreach sat_test of local sat_tests {

		gen qt_1_`sat_test' = .
		gen qt_2_`sat_test' = .
		gen qt_3_`sat_test' = .

		foreach year_sat of local years_sat {
			*Generate quartile results
			su `sat_test' if yearsat == `year_sat' , detail
			local top_quart = r(p75)
			local top_half = r(p50)
			local top_three_quart = r(p25)
			replace qt_1_`sat_test' = `sat_test' > `top_quart' if yearsat == `year_sat' & `sat_test' != .
			replace qt_2_`sat_test' = `sat_test' > `top_half' if yearsat == `year_sat' & `sat_test' != .
			replace qt_3_`sat_test' = `sat_test' > `top_three_quart' if yearsat == `year_sat' & `sat_test' != .

		}
	}
	save "$stata_data_sat/sat_sasid_wide", replace
}

/**************************** Merge Test **************************************/
if $merge_test == 1{
	use "$stata_data_mcas/mcas_sasid_wide.dta", clear
	merge 1:1 sasid_no using "$stata_data_sat/sat_sasid_wide", nogen

	save "$stata_data/test_wide.dta", replace

}
